package com.doit.shark.crawler.comment;/**
 * Created by hunter.coder 涛哥
 * 2019/4/15 16:16
 * 交流qq:657270652
 * Version: 1.0
 * 更多学习资料：https://blog.csdn.net/coderblack/
 * Description:
 **/

import com.hankcs.hanlp.HanLP;
import org.apache.commons.lang3.StringUtils;
import org.codehaus.jackson.map.ObjectMapper;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.List;

/**
 * @Author hunter.coder
 * @Date 2019/4/15 16:16
 * @Version 1.0
 * @Description 京东商品评论爬虫
 **/
public class CommentExtractor {

    public static void main(String[] args) throws Exception {

        String c1 = "手机通讯/数码";
        String c2 = "手机通讯";
        String c3 = "手机";

        for (int i = 1; i <= 2; i++) {
            System.out.println("-----------------" + i + "---------------");
            Document doc = Jsoup.connect("https://list.jd.com/list.html?cat=9987,653,655&page=" + i).get();
            Elements aEles = doc
                    .getElementsByClass("gl-warp clearfix")
                    .get(0)
                    .select("a[title='']");

            for (Element aEle : aEles) {
                String href = aEle.attr("href");
                if (href.startsWith("//")) {
                    String s = href.split("/", -1)[3];
                    String pid = s.substring(0, s.lastIndexOf("h") - 1);
                    if (pid.length() == 11) {
                        extractComment(href, pid);
                    }
                }
            }


        }
    }

    private static void extractComment(String href, String pid) throws IOException {

        for (int i = 1; i < 2; i++) {
            String curl = "https://sclub.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv80&productId=" + pid + "&score=0&sortType=5&page=" + i + "&pageSize=10&isShadowSku=0&rid=0&fold=1";
            Document doc = Jsoup.connect(curl).get();
            System.out.println(doc);
            System.out.println("----------------------------");
        }
    }
}
